Jupyter Data Science


In [9]:
%matplotlib inline
import matplotlib.pyplot as plt
plt.style.use('seaborn')

In [10]:
import os
from urllib.request import urlretrieve

import pandas as pd

URL = 'https://data.seattle.gov/api/views/65db-xm6k/rows.csv?accessType=DOWNLOAD'

def get_fremont_data(filename='Fremont.csv', url=URL):
    if not os.path.exists(filename):
        urlretrieve(url, filename)
    data = pd.read_csv('Fremont.csv', index_col='Date', parse_dates=True)
    data.columns = ('West', 'East')
    data['Total'] = data['West'] + data['East']
    return data

In [11]:
data = get_fremont_data()
data.head()


Out[11]:
West East Total
Date
2012-10-03 00:00:00 4.0 9.0 13.0
2012-10-03 01:00:00 4.0 6.0 10.0
2012-10-03 02:00:00 1.0 1.0 2.0
2012-10-03 03:00:00 2.0 3.0 5.0
2012-10-03 04:00:00 6.0 1.0 7.0

In [12]:
from urllib.request import urlretrieve
urlretrieve(URL, 'Fremont.csv')


Out[12]:
('Fremont.csv', <http.client.HTTPMessage at 0x10ae02f28>)

In [13]:
%matplotlib inline
data.resample('W').sum().plot(); #W stands for weekly, sum the values, and then plot



In [14]:
data.resample('W').sum().plot();



In [15]:
data['Total'] = data['West'] + data['East']

ax = data.resample('D').sum().rolling(365).sum().plot();
ax.set_ylim(0, None);



In [16]:
data.groupby(data.index.time).mean().plot();



In [17]:
pivoted = data.pivot_table('Total', index=data.index.time, columns=data.index.date)
pivoted.iloc[:5, :5]


Out[17]:
2012-10-03 2012-10-04 2012-10-05 2012-10-06 2012-10-07
00:00:00 13.0 18.0 11.0 15.0 11.0
01:00:00 10.0 3.0 8.0 15.0 17.0
02:00:00 2.0 9.0 7.0 9.0 3.0
03:00:00 5.0 3.0 4.0 3.0 6.0
04:00:00 7.0 8.0 9.0 5.0 3.0

In [18]:
pivoted.plot(legend=False, alpha=0.01);



In [ ]: